(define-module (data-mining util)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 receive)
  #:use-module (ice-9 threads)		;par-map
  #:export (scan
	    successive-apply
	    successive-filter

	    take-indices
	    gather
	    list-insert
	    list-split
	    list-mask
	    list-indices

	    borders
	    combine

	    recursive-map

	    for-each/key+value
	    map/key+value
	    filter-map/key+value

	    substitute-bindings
	    substitute-map

	    alist-merge
	    reverse-map

	    max*
	    min*
	    extremum*
	    max+value*
	    min+value*
	    extremum+value*
	    par-extremum+value*

	    permutations
	    k-combinations
	    powerset))

;; Utility Procedures and generics
;; ===============================

(define (scan proc lst)
  "Prefix scan across LST.  Apply PROC as (proc p e) for each e in
LST, and P being the result of the previous application of PROC,
except for the first application, when p is the head of LST."
  (reverse!
   (fold
    (lambda (e l) (cons (proc (car l) e) l))
    (list (car lst))
    (cdr lst))))

(define (successive-apply procs init)
  "Apply successive procedures from PROCS to LST, accumulating
intermediate results.  The first element of the returned list is the
result of applying the first procedure in PROCS to INIT.

e.g.:
> (define ps '(p1 p2 p3 p4))
> (successive-apply ps l)
  => ((p1 l) (p2 (p1 l)) (p3 (p2 (p1 l))) (p4 (p3 (p2 (p1 l)))))"
  (reverse!
   (fold
    (lambda (p l) (cons (p (car l)) l))
    (list ((car procs) init))
    (cdr procs))))

(define (successive-filter preds lst)
  "Apply successive filter predicates to LST"
  (successive-apply (map (lambda (p)
			   (cut filter p <>))
			 preds)
		    lst))

(define (list-insert l i e)
  "Insert into L at position I the element E"
  (cond ((null? l) (list e))
	((<= i 0) (cons e l))
	(else (cons (car l)
		    (list-insert (cdr l) (1- i) e)))))

(define (take-indices lst ind)
  "Return a subset of elements from LST.  The elements in the
resulting list are those which occupied indices in IND.  The indices
in IND need not be sorted."
  (define (take-indices-iter i inds l acc)
    (if (or (null? inds) (null? l))
	(reverse! acc)
	(if (= i (car inds))
	    (take-indices-iter (1+ i) (cdr inds) (cdr l) (cons (car l) acc))
	    (take-indices-iter (1+ i) inds (cdr l) acc))))
  (let ((sind (sort ind <)))
    (take-indices-iter 0 sind lst '())))

(define (gather lst ind)
  (map (cut list-ref lst <>) ind))

(define* (list-mask lst mask #:optional (dflt #f))
  "Return a new list of the same length as LST.  For each element
index i in MASK the returned list has a copy of the corresponding
element from LST.  Otherwise DFLT.

E.g. (list-mask '(0 1 2 3 4 5) '(1 2 5))
     => '(#f 1 2 #f #f 5)"
  (fold
   (lambda (i result)
     (list-set! result i (list-ref lst i))
     result)
   (make-list (length lst) dflt)
   mask))

(define (list-indices pred . lsts)
  "Returns a list of all indices for which (pred lst1 lst2 ...) is
satisfied.  Similar to list-index."
  (let loop ((count 0)
	     (result '())
	     (lsts lsts))
    (if (any null? lsts)
	(reverse! result)
	(loop (1+ count)
	      (if (apply pred (map car lsts))
		  (cons count result)
		  result)
	      (map cdr lsts)))))

(define (list-split lst n)
  "Split a list into n sublists of roughly equal size, and return a
new list containing those sublists as elements.

e.g.:

> (list-split (iota 10) 3)
  => ((0 1 2) (3 4 5) (6 7 8 9))"
  (if (or (null? lst) (= n 0))
      '()
      (let ((s (quotient (length lst) n)))
	(cons (list-head lst s)
	      (list-split (list-tail lst s)
			  (1- n))))))

(define* (borders lst #:optional (pred eq?))
  "Return the pairs of consecutive values (a . b) from LST for
which (PRED a b) does not return #t"
  (remove (lambda (e) (pred (car e) (cdr e)))
	  (combine cons lst 2)))

(define (combine proc lst n)
"Applies procedure PROC to groups of N consecutive elements from LST,
resulting in a new list with (- (length LST) N) elements."
  (define (combine-iter g l acc)
    (if (< (length l) n)
	(append acc (list (apply proc g))) ;do the last group
	(combine-iter
	 (append (cdr g) (list (list-ref l (1- n))))
	 (cdr l)
	 (append acc (list (apply proc g))))))
  (combine-iter (take lst n) (cdr lst) '()))

(define (recursive-map proc lst)
"Apply procedure PROC to elements in LST.  If an element, ELEM, is
itself a list, then recursively apply PROC to the elements of ELEM."
  (map (lambda (e)
	 (cond ((list? e) (recursive-map proc e))
	       ((pair? e) (list (proc (car e)) (proc (cdr e))))
	       (else (proc e))))
       lst))

(define (apply-pair proc)
  "Return a procedure that takes as argument a pair and applies the
arity-2 procedure PROC to the car and cdr of that pair."
  (lambda (p) (proc (car p) (cdr p))))

(define (for-each/key+value proc lst)
  "Where LST is a list of key+value pairs, call (PROC key value)"
  (for-each (apply-pair proc) lst))

(define (map/key+value proc lst)
  "Where LST is a list of key+value pairs, call (PROC key value) and
return a list of the results."
  (map (apply-pair proc) lst))

(define (filter-map/key+value proc lst)
  "Map PROC on LST but leave out applications that result in #f"
  (filter-map (apply-pair proc) lst))

(define (substitute-bindings lst bindings)
  "For every symbol that is a key in BINDINGS, substitute the
corresponding value into the resulting list."
  (recursive-map (lambda (a)
		   (if (symbol? a)
		       (let ((binding (assq a bindings)))
			 (if binding
			     (cdr binding)
			     a))
		       a))
		 lst))

(define (substitute-map lst mapping)
  "More general form of substitute-bindings that replaces matching key
from MAPPING into LST.  I.e. the keys in the MAPPING alist need not be
symbols."
  (recursive-map (lambda (a)
		   (let ((binding (assoc a mapping)))
		     (if binding
			 (cdr binding)
			 a)))
		 lst))

(define (alist-merge proc . rest)
"Merge a number of alists together.  If two alists share a common key,
then the corresponding values are merged by applying PROC to those
values."
  (define (merge-iter ks acc)
    (if (null? ks) acc
	(let* ((key (car ks))
	       (values (map cdr (filter-map (cut assoc key <>) rest))))
	  (merge-iter (cdr ks) (acons key (apply proc values) acc)))))
  (merge-iter (apply (cut lset-union equal? <...>)
		     (map (cut map car <>) rest))
	      '()))

(define (reverse-map alst)
"Reverse the mapping in ALST such that the values now point to the
keys.  The values in ALST must be unique

E.g: (reverse-map '((\"a\" . 1) (\"b\" . 2)))
     => ((1 . \"a\") (2 . \"b\"))"
  (define (swap-pair p) (cons (cdr p) (car p)))
  (map swap-pair alst))

(define (memoized-extremum+value* proc cmp x x* . rest)
  (if (null? rest)
      (values x x*)
      (let* ((y (car rest))
	     (y* (proc y)))
	(if (cmp x* y*)
	    (apply memoized-extremum+value* proc cmp x x* (cdr rest))
	    (apply memoized-extremum+value* proc cmp y y* (cdr rest))))))
(define extremum*
  (case-lambda
   ((proc cmp x) x)
   ((proc cmp x . rest)
    (receive (extremum value)
	(apply memoized-extremum+value* proc cmp x (proc x) rest)
      extremum))))
(define (extremum+value* proc cmp x . rest)
  (apply memoized-extremum+value* proc cmp x (proc x) rest))

(define (par-extremum+value* proc cmp . args)
  "Apply extremum+value* in parallel."
  ;; Do extremum+value* on chunks of args, then aggregate results.
  (let ((result
	 (apply extremum* cdr cmp
		(par-map
		 (lambda (lst)
		   (receive (x x*)
		       (apply extremum+value* proc cmp lst)
		     (cons x x*)))
		 (list-split args
			     ;; min ensures that each there's at least
			     ;; one element in each resulting list.
			     (min (length args)
				  (current-processor-count)))))))
    (values (car result) (cdr result))))

(define (max* proc . args)
  "Return the maximal element of the arguments given, where comparison
is based on the value returned by applying PROC to each.  PROC is not
called more than once for each argument."
  (apply extremum* proc > args))
(define (max+value* proc . args)
  "Like max* but returns two values: The maximal element, max, and the
value of (PROC MAX)."
  (apply extremum+value* proc > args))

(define (min* proc . args)
  "Like max* but return the minimal argument."
  (apply extremum* proc < args))
(define (min+value* proc . args)
  "Like max+value* but for the minimal argument."
  (apply extremum+value* proc < args))


(define (permutations lst)
  "Return a list where each element of the list is a permutation of the
input list LST."
  (cond ((null? lst) '(()))
	((= (length lst) 1) (list lst))
	(else
	 (let ((head (car lst))
	       (rec-perms (permutations (cdr lst))))
	   (concatenate
	    (map (lambda (p)
		   (map (cut list-insert p <> head)
			(iota (1+ (length p)))))
		 rec-perms))))))

;; This is SUPER expensive!!
(define (k-combinations lst k)
  ;; TODO: Try an iterative approach.  Get rid of 'drop' and 'iota',
  ;; which become expensive.
  (if (> k 0)
      (concatenate (map (lambda (e i)
			  (map (cut cons e <>)
			       (k-combinations (drop lst i)
					       (1- k))))
			lst
			(iota (length lst) 1)))
      '(())))

(define* (powerset lst #:optional (lower 0) (upper (length lst)))
  "Return the poweset of LST whose subsets have cardinality at least
LOWER and at most UPPER."
  (concatenate
   (map (cut k-combinations lst <>)
	(iota (- upper lower -1) lower))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Tests

(use-modules (srfi srfi-64))
(use-modules (ice-9 format))

(test-begin "util-test")

;;; Check min*, max*, and extremum*
(test-equal '(4 . "a") (max* car '(1 . "c") '(4 . "a")))
(receive (max value)
    (max+value* car '(1 . 20) '(4 . 2))
  (test-equal '(4 . 2) max)
  (test-equal 4 value))
(test-equal '(1 . "c") (min* car '(1 . "c") '(4 . "a")))
(receive (min value)
    (min+value* car '(1 . 20) '(4 . 2))
  (test-equal '(1 . 20) min)
  (test-equal 1 value))
(receive (min value)
    (extremum+value* cdr string<? '(1 . "c") '(4 . "a") '(0 . "b"))
  (test-equal '(4 . "a") min)
  (test-equal "a" value))

;;; Check substitute-bindings and substitute-map
(define vals '(("r" . 2) (foo . 20) ("bar" . "baz")))
(test-equal "substitute-bindings"
	    '(+ 20 20)
	    (substitute-bindings '(+ foo foo) vals))
(test-equal "substitute-map"
	    '(format #t "~a: ~a\n" "baz" (/ 2 20))
	    (substitute-map '(format #t "~a: ~a\n" "bar" (/ "r" foo)) vals))

;;; Check list-mask
(test-equal "list-mask"
	    '(#f 1 2 #f #f 5)
	    (list-mask '(0 1 2 3 4 5) '(1 2 5)))
(test-equal "list-mask with dflt"
	    '(0 1 2 0 0 5)
	    (list-mask '(0 1 2 3 4 5) '(1 2 5) 0))

;;; Check list-indices
(test-equal "list-indices"
	    '(0 2 4)
	    (list-indices even? '(0 1 2 3 4 5)))
(test-equal "list-indices"
	    '(1 3 5)
	    (list-indices odd? '(0 1 2 3 4 5)))
(test-equal "list-indices"
	    '(0 4)
	    (list-indices identity '(#t #f #f #f #t)))

(test-end "util-test")
